from pyspark import SparkConf, SparkContext

if __name__ == '__main__':
    # 构建SparkConf对象
    conf = SparkConf().setAppName("test").setMaster("local[*]")
    # 构建SparkContext执行环境入口对象
    sc = SparkContext(conf=conf)

    rdd = sc.parallelize([1, 2, 3, 4, 5, 6])

    rdd = rdd.filter(lambda x: x % 2 == 1)
    """
    filter方法：过滤记录
    f: (T) -> bool
    接收任意类型参数，返回值类型必须为bool 
    """

    print(rdd.collect())
